Chapter 8 HMSC analysis

8.1 Load data

load("data/data.Rdata")
load("hmsc/fit_model1_250_10.Rdata")

8.2 Variance partitioning

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location")))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_6adu6h7wfv61ym200i4g
variable mean sd
Random: location 37.900015 25.317903
logseqdepth 56.110626 25.796874
sex 4.937460 5.612719
origin 1.051899 1.282563
# Basal tree
varpart_tree <- genome_tree

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location"))))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
       scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

8.3 Posterior estimates

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    #select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top 

## Correlations

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Refernece tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames) 


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

htree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(matrix,vtree),
             layout_matrix = rbind(c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1)))

8.4 Predict responses

# Select modelchain of interest
load("hmsc/fit_model1_250_10.Rdata")

gradient = c("domestic","feral")
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred <- constructGradient(m, 
                      focalVariable = "origin", 
                      non.focalVariables = list(logseqdepth=list(1),location=list(1))) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(origin=rep(gradient,1000)) %>%
            pivot_longer(!origin,names_to = "genome", values_to = "value")
# weights:  9 (4 variable)
initial  value 101.072331 
final  value 91.392443 
converged

8.4.0.1 Function level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated
function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  tt()
tinytable_h3c9aefxw3n7icr1e4rj
trait mean p1 p9 positive_support negative_support
D02 7.943818e-03 -0.0035659330 0.0203998706 0.814 0.186
B08 8.140762e-03 -0.0035118978 0.0196807401 0.787 0.213
B01 9.817798e-04 -0.0056385892 0.0083570721 0.632 0.368
S01 5.721836e-04 -0.0126077833 0.0130786482 0.565 0.435
B10 1.741428e-06 -0.0002973356 0.0002604708 0.486 0.514
# Negatively associated
function_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  tt()
tinytable_tnvhjwq827s89320fjgu
trait mean p1 p9 positive_support negative_support
D08 -1.214862e-03 -0.0022939461 -1.937513e-04 0.051 0.949
B03 -1.055852e-02 -0.0180263536 -2.778997e-03 0.055 0.945
D06 -3.137025e-03 -0.0067440893 2.240275e-05 0.101 0.899
B04 -8.879801e-03 -0.0199137473 7.496508e-04 0.128 0.872
B06 -7.017554e-03 -0.0175214946 2.467422e-03 0.178 0.822
D07 -1.232191e-02 -0.0308839790 4.181036e-03 0.181 0.819
D03 -4.200875e-03 -0.0132245471 3.655181e-03 0.213 0.787
D05 -1.360651e-03 -0.0074006070 4.341303e-03 0.241 0.759
S03 -9.012202e-03 -0.0328886683 1.716974e-02 0.271 0.729
B02 -3.478904e-03 -0.0124916077 5.407998e-03 0.284 0.716
S02 -5.147206e-03 -0.0156994292 2.781468e-03 0.302 0.698
B07 -3.782134e-03 -0.0165121548 9.410750e-03 0.330 0.670
D09 -1.847566e-03 -0.0086808845 5.390441e-03 0.330 0.670
B09 -2.027127e-05 -0.0005793415 4.283062e-04 0.366 0.634
D01 -2.273005e-04 -0.0052248355 4.232479e-03 0.436 0.564
positive <- function_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- function_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.02)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

8.4.0.2 Element level

elements_table <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt()
tinytable_i01v8e4fmawg7ofaj5ec
trait mean p1 p9 positive_support negative_support
D0205 0.012645450 0.0024775212 0.023274188 0.945 0.055
D0208 0.010176614 0.0022117782 0.018696465 0.945 0.055
D0906 0.003596117 0.0001275640 0.008020033 0.925 0.075
D0504 0.004451800 0.0003726435 0.009522305 0.916 0.084
B0103 0.008553518 0.0001228220 0.016976336 0.902 0.098
element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  tt()
tinytable_1idfa8reivm1883dh4za
trait mean p1 p9 positive_support negative_support
D0801 -0.002090689 -0.002438983 -9.272219e-05 0.005 0.995
D0802 -0.002090689 -0.002438983 -9.272219e-05 0.005 0.995
D0517 -0.004532699 -0.007986505 -1.168789e-03 0.030 0.970
B0302 -0.005001966 -0.010562544 -5.506823e-04 0.039 0.961
D0601 -0.009173358 -0.017653493 -2.056762e-03 0.040 0.960
B0310 -0.012823144 -0.023057077 -2.492560e-03 0.045 0.955
B0219 -0.004220671 -0.010020412 -2.322877e-04 0.049 0.951
D0603 -0.001949497 -0.003952858 -3.128527e-04 0.049 0.951
D0611 -0.004163572 -0.009897145 -2.260342e-04 0.050 0.950
D0903 -0.004163572 -0.009897145 -2.260342e-04 0.050 0.950
B0709 -0.002067869 -0.003601898 -4.610807e-04 0.052 0.948
D0610 -0.002851369 -0.004825160 -6.045636e-04 0.061 0.939
D0807 -0.004248293 -0.008791774 -5.213497e-04 0.061 0.939
B0804 -0.016554792 -0.030214130 -2.413867e-03 0.066 0.934
B0303 -0.011651780 -0.021956618 -2.027232e-03 0.067 0.933
D0817 -0.004955047 -0.010619045 -4.375213e-04 0.067 0.933
B0603 -0.016917107 -0.033606646 -2.488392e-03 0.069 0.931
B0401 -0.012703245 -0.027187239 -1.024461e-03 0.075 0.925
D0606 -0.005849762 -0.011667610 -6.399018e-04 0.076 0.924
B0214 -0.020880218 -0.038383544 -2.819795e-03 0.078 0.922
D0508 -0.003423296 -0.007959844 -1.788522e-04 0.081 0.919
D0908 -0.015011644 -0.027596991 -1.822608e-03 0.084 0.916
D0816 -0.005769844 -0.012208972 -2.589436e-04 0.088 0.912
B0601 -0.009430120 -0.018345796 -4.765228e-04 0.089 0.911
B0204 -0.015628057 -0.033743640 -1.376524e-04 0.098 0.902
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.04,0.04)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")